library(DBI)
library(kableExtra)
library(ggplot2)
library(dplyr)
library(knitr)
library(DT)
# Establish MySQL database connection
con <- dbConnect(
RMySQL::MySQL(),
dbname = "practicum2",
host = "database-2.cr84qqy6ksk7.us-east-2.rds.amazonaws.com",
port = 3306,
user = "admin",
password = "practicum2"
)
# Checking if the connection has been successfully established
if (dbIsValid(con)) {
cat("Connection to MySQL database successful!\n")
} else {
cat("Failed to connect to the MySQL database.\n")
}
Connection to MySQL database successful!
###Analytical Query I: Top five sales reps with the most sales broken down by year. So, for each year for which there is data, the top five reps for that year. Think about how you would best present this information so it is easily usable by an analyst or manager
query1 <- "
WITH ranked_reps AS (
SELECT
year,
repName,
SUM(totalSold) AS total_sales,
ROW_NUMBER() OVER (PARTITION BY year ORDER BY SUM(totalSold) DESC) AS sales_rank
FROM
rep_facts
GROUP BY
year, repName
)
SELECT
year,
repName,
total_sales
FROM
ranked_reps
WHERE
sales_rank <= 5
ORDER BY
year, total_sales DESC;
"
result1 <- dbGetQuery(con, query1)
Warning: Decimal MySQL column 2 imported as numeric
# Formatting the data for presentation
datatable(result1,
options = list(
stripeClasses = c('odd', 'even'), # Apply alternating row colors
paging = TRUE, # Enable pagination if needed
searching = TRUE # Enable search functionality
)
)
NA
NA
###Analytical Query II: Total sold per product per quarter. Think about how to best display this information.
AQ2 <- "
SELECT
productName,
quarter,
SUM(totalUnitsSold) AS totalUnitsSold
FROM product_facts
GROUP BY productName, quarter
ORDER BY
productName,
quarter;
"
# Execute the query and store the result
resultII <- dbGetQuery(con, AQ2)
Warning: Decimal MySQL column 2 imported as numeric
# Arrange the result by productName, and quarter
resultII <- arrange(resultII, productName, quarter)
datatable(resultII,
options = list(
paging = TRUE, # Enable pagination
searching = TRUE, # Enable search functionality
ordering = TRUE # Enable column sorting
),
caption = "Average Sales per Sales Rep Over the Years"
)
NA
NA
NA
# Execute the query and store the result
AQ3 <- "
SELECT
productName,
territory,
SUM(totalUnitsSold) AS totalUnitsSold
FROM product_facts
GROUP BY productName, territory
"
resultIII <- dbGetQuery(con, AQ3)
# Arrange the result
resultIII <- arrange(resultIII, productName, territory)
# Formatting the data for presentation
result <- resultIII %>%
kable("html") %>%
kable_styling(bootstrap_options = "striped", full_width = FALSE)
result
| productName | territory | totalUnitsSold |
|---|---|---|
| Alaraphosol | EMEA | 510600 |
| Alaraphosol | East | 296500 |
| Alaraphosol | South America | 475500 |
| Alaraphosol | West | 294900 |
| Bhiktarvizem | EMEA | 467100 |
| Bhiktarvizem | East | 351600 |
| Bhiktarvizem | South America | 480300 |
| Bhiktarvizem | West | 351300 |
| Clobromizen | EMEA | 478600 |
| Clobromizen | East | 355300 |
| Clobromizen | South America | 486600 |
| Clobromizen | West | 392400 |
| Colophrazen | EMEA | 533500 |
| Colophrazen | East | 353000 |
| Colophrazen | South America | 493100 |
| Colophrazen | West | 331000 |
| Diaprogenix | EMEA | 506400 |
| Diaprogenix | East | 312900 |
| Diaprogenix | South America | 534000 |
| Diaprogenix | West | 350700 |
| Gerantrazeophem | EMEA | 524900 |
| Gerantrazeophem | East | 383500 |
| Gerantrazeophem | South America | 534000 |
| Gerantrazeophem | West | 329600 |
| Presterone | EMEA | 499000 |
| Presterone | East | 324700 |
| Presterone | South America | 470800 |
| Presterone | West | 337700 |
| Proxinostat | EMEA | 500800 |
| Proxinostat | East | 360200 |
| Proxinostat | South America | 522000 |
| Proxinostat | West | 388500 |
| Xinoprozen | EMEA | 498400 |
| Xinoprozen | East | 324700 |
| Xinoprozen | South America | 506800 |
| Xinoprozen | West | 304800 |
| Xipramin | EMEA | 225500 |
| Xipramin | East | 188300 |
| Xipramin | South America | 275700 |
| Xipramin | West | 164400 |
| Zalofen | EMEA | 274200 |
| Zalofen | East | 153100 |
| Zalofen | South America | 254400 |
| Zalofen | West | 165900 |
library(plotly)
# Assuming 'result3' is your data frame containing the query result
# Create the line plot
line_plot <- plot_ly(resultIII, x = ~territory, y = ~totalUnitsSold, color = ~productName, type = 'scatter', mode = 'lines+markers') %>%
layout(title = "Total Units Sold by Territory",
xaxis = list(title = "Territory"),
yaxis = list(title = "Total Units Sold"),
legend = list(orientation = "h"))
# Print the line plot
line_plot
NA
###Analytical Query IV: Average sales per sales rep over the years. Show the result in a line graph visualization.
library(DT)
# Execute the SQL query and fetch the result
AQ4 <- "
SELECT
rf.repName,
rf.year,
AVG(rf.totalSold) AS avg_sales
FROM
rep_facts rf
JOIN
product_facts pf ON rf.productID = pf.productID
GROUP BY
rf.repName, rf.year;
"
resultIV <- dbGetQuery(con, AQ4)
Warning: Decimal MySQL column 2 imported as numeric
# Display the result in table format
datatable(resultIV,
options = list(
paging = TRUE, # Enable pagination
searching = TRUE, # Enable search functionality
ordering = TRUE # Enable column sorting
),
caption = "Average Sales per Sales Rep Over the Years"
)
library(ggplot2)
# Plot the line graph
ggplot(resultIV, aes(x = year, y = avg_sales, group = repName, color = repName)) +
geom_line() +
geom_point() +
labs(title = "Average Sales per Sales Rep Over the Years",
x = "Year",
y = "Average Sales") +
theme_minimal() +
theme(legend.position = "bottom") +
scale_color_viridis_d()
dbDisconnect(con)
[1] TRUE
This report provides valuable insights into our sales performance, highlighting trends and patterns that can inform strategic decision-making. By understanding our sales data more comprehensively, we can identify opportunities for growth and optimize our sales strategies.